################################################################################ 
#
# The distribution of hate speech and its implications for content moderation
# PSRM - Replication package
# Table E5
#
################################################################################ 


################################################################################ 
#  LIBRARIES
################################################################################ 

library(dplyr)        
library(readr)        
library(estimatr)     
library(glmnet)       
library(hdm)

rm(list = setdiff(ls(), ls(pattern = "^wd|^setsave$")))

set.seed(42)


################################################################################ 
#   DATA AND FOLDER
################################################################################ 

# read
data = read.csv(paste0(wd_data_processed, '/for_analysis/clean_for_analysis.csv'))
lassodata = read_rds(paste0(wd_data_processed, '/for_analysis/data_lasso.RDS'))

# drop attrited users
data = data[which(data$attrition==0 & data$not_auth_sofar==0 & data$suspended_sofar==0 & data$protected_sofar==0),]


################################################################################ 
#   Prepare
################################################################################ 

outvars = c('anchor_deleted_12h', 'no_hate_tweets_post_85', 'hate_rate_85', 'post_tweets_h_avg')
outvars_name = c('Hate Tweet Deleted <12h', '# Hate Tweets', 'Share of daily hate', 'Probability of Hate Speech') 

treatments = c('alert', 'consequences', 'empathy')
treatments_lab = c( 'Alerting of Hate Speech', 'Warning of Consequences', 'Empathy')


# Scale data
data[outvars] = lapply(data[outvars], scale)
lassodata = lapply(lassodata, scale) %>% as.data.frame()

################################################################################ 
#  MODELS
################################################################################ 

MODELS = list()

n = 1

for (j in 1:length(outvars)){
  for (i in 1:length(treatments)){
    
    droper = union(which(is.na(data[outvars[j]])), which(data[outvars[j]]==Inf))
    keeper = union(which(data$treat_aggregate == treatments[i]),  which(data$treat_aggregate == 'control')) 
    keeper = setdiff(keeper, droper)
    
    y = data[keeper, outvars[j]]
    d = data$treated[keeper]
    x = lassodata[keeper, ] %>% as.matrix()
    
    print('Data ready')
    
    c = rep(FALSE, ncol(x))
    c[c(2,3)] = TRUE
    
    MODELS[[n]] = rlassoEffect(x, y, d, method = "double selection", 
                             I3 = c)
    
    print('Done with: ')
    print(n)
    
    n = n+1
    
  }}



################################################################################ 
#   Clean the model outcomes
################################################################################ 

make.container = function(MOD){
  coef = sapply(MOD, function(m) summary(m)[[1]][1])
  se = sapply(MOD, function(m) summary(m)[[1]][2])
  pval = sapply(MOD, function(m) summary(m)[[1]][4])
  regnum =  sapply(MOD, function(m) length(m$coefficients.reg)-2)
  sample = sapply(MOD, function(m) m$samplesize)
  controls = sapply(MOD, function(m) paste0(names(m$coefficients.reg)[-c(1, 2)], collapse = ', '))
  
  container = cbind.data.frame(yvar = rep(outvars, each=length(treatments)),
                               yvar_name = rep(outvars_name, each=length(treatments)),
                               treat = rep(treatments, time=length(outvars)),
                               treatments_lab = rep(treatments_lab, time=length(outvars)),
                               coef = coef, se=se, pval=pval,
                               samplesize = sample,
                               lasso_cont_num = regnum,
                               controls = controls )
  return(container)
}


container = make.container(MODELS)


################################################################################ 
#   Adjust p-values
################################################################################ 


save_bh_adj = function(data){
  data$outvars.f = factor(data$yvar_name, levels = unique(data$yvar_name))
  output = data.frame()
  for (t in unique(data$treatments_lab)) {
    
    message(paste0('Pvals for ', t))
    temp = data[data$treatments_lab == t, ]
    
    pvals = temp$pval
    pvals_fdr = p.adjust(pvals, method = "BH", n = length(pvals))
    
    out = data.frame(
      treatments_lab = rep(t, length(pvals)),
      yvar_name = temp$yvar_name,
      pvals = as.numeric(pvals),
      pvals_fdr = as.numeric(pvals_fdr)
    )
    
    output = rbind(output, out)
  }
  
  return(output)
}

container_bh <- save_bh_adj(data = container)
container <- left_join(container, container_bh %>% select(treatments_lab, yvar_name, pvals_fdr), by = c("treatments_lab", "yvar_name"))

if (setsave){write.csv(container, paste0(wd_res, '/tables/tabE5.csv'), row.names = F )}
cat("\n====================\n")
cat("Saved Table E5")
cat("\n====================\n")
